Subway stations’ accesibility features

subway_cleaned = read_csv("Data/cleaned_subway_data.csv")

# Importing NYC map
nyc_map = st_read(here::here('NYC', 'nyc.shp'), quiet = TRUE)
nycmap = st_transform(nyc_map, crs = 4326)

Here we investigate features and the overall accessibility of the stations in order to gain insights how accessible NYC subway stations are.

ADA compliances

subway_cleaned %>% 
  ggplot() +
    geom_sf(
      data = nyc_map, fill = NA
    ) + 
    geom_point(
      aes(x = station_longitude, y = station_latitude, color = ada),
      size = 2.5, alpha = 0.5) +
  coord_sf() +
  theme_void(base_size = 10) +
  theme(legend.position = 'bottom') +
  guides(color = guide_legend(
    title.position = "top",
    override.aes = list(size = 3))) +
  scale_color_manual(values = c("FALSE" = "aquamarine3", "TRUE" = "slateblue3")) +
  labs(color = "ADA Compliance")

ada compliance ensures that individuals with disabilities can safely and easily access subway stations. In the plot, each dot represent a station. We can tell that NYC’s stations do not perform well on this issue…

Free Cross Over

subway_cleaned %>% 
  ggplot() +
    geom_sf(
      data = nyc_map, fill = NA
    ) + 
    geom_point(
      aes(x = station_longitude, y = station_latitude, color = free_crossover),
      size = 2.5, alpha = 0.5) +
  coord_sf() +
  theme_void(base_size = 10) +
  theme(legend.position = 'bottom') +
  guides(color = guide_legend(
    title.position = "top",
    override.aes = list(size = 3))) +
  labs(color = "Free Crossover")

As the most complex subway system in the world, however, NYC Subway provides free_crossover for most of its stations.With this understanding of both the shortcomings and strengths in terms of ADA compliance and free crossovers, we now turn our attention to evaluating the overall accessibility performance of NYC’s subway stations as a whole.

How accessible stations are overall?

Rank the accessibility of stations,

# Select the Variables for Clustering
clustering_data <- subway_cleaned %>%
  dplyr::select(entrance_type, staffing, ada, free_crossover, station_latitude, station_longitude, station_name)

set.seed(123)  # For reproducibility
km_result <- clustering_data %>% 
  dplyr::select(-station_latitude, -station_longitude, -station_name) %>% 
  kmodes(modes = 3)

We cluster the overall accessibility in three groups(levels) with the features of accessibility inside the stations.

clustering_data$.cluster <- factor(km_result$cluster)

scenario_counts <- clustering_data %>%
  mutate(
    scenario = case_when(
      ada == "TRUE" & free_crossover == "TRUE" ~ "ADA=TRUE, Free=TRUE",
      ada == "FALSE" & free_crossover == "FALSE" ~ "ADA=FALSE, Free=FALSE",
      ada == "FALSE" & free_crossover == "TRUE" ~ "ADA=FALSE, Free=TRUE",
      TRUE ~ NA_character_
    )
  ) %>%
  filter(!is.na(scenario)) %>%
  group_by(scenario, .cluster) %>%
  summarise(n = n(), .groups = "drop")

plot_ly(
  data = scenario_counts,
  x = ~scenario,
  y = ~n,
  color = ~.cluster,
  type = "bar"
) %>%
  layout(
    title = "Count of Stations by Scenario and Cluster",
    xaxis = list(title = "Scenario"),
    yaxis = list(title = "Count of Stations"),
    barmode = "group",
    bargap = 0.2
  )

From the plot, we can tell the stations with with highest accessibility is less likely to have neither ada nor free_crossover, and the lowest accessibility stations vice versa. Also, in the scenario where the stations only have free_crossover, the more accessible this station is, the less likely it will happen. However, most of the stations only have the basic amenities free_crossover.

# Define a Mode function
Mode <- function(x) {
  ux <- unique(x)
  ux[which.max(tabulate(match(x, ux)))]
}

cluster_profiles <- clustering_data %>%
  group_by(.cluster) %>%
  dplyr::select(-station_latitude, -station_longitude) %>% 
  dplyr::select(station_name, everything()) %>% 
  summarise(across(everything(), ~ Mode(.x)))

knitr::kable(cluster_profiles)
.cluster station_name entrance_type staffing ada free_crossover
1 59th St Stair FULL FALSE TRUE
2 25th St Stair FULL FALSE TRUE
3 36th St Stair FULL FALSE TRUE

As we display the result of our model, the all of the dominant features tend to be same across the clusters. This result happens due to the un-even distribution of these categorical variables, which can be reflected in our visualization section.

clustering_data <- clustering_data %>% 
  mutate(
    accessibility_level = case_when(
      .cluster == 1 ~ "High Accessibility",
      .cluster == 2 ~ "Medium Accessibility",
      .cluster == 3 ~ "Low Accessibility"
    ),
    accessibility_level = factor(accessibility_level, 
                                 levels = c("High Accessibility", "Medium Accessibility", "Low Accessibility"))
  )

pal <- leaflet::colorFactor(
  palette = c("chartreuse", "darkgoldenrod1", "brown2"),  # Adjust colors as needed
  domain = clustering_data$accessibility_level
)

leaflet() |>
  addTiles() |>  
  addCircleMarkers(data = clustering_data,
             lng = ~station_longitude,
             lat = ~station_latitude,
             label = ~station_name,
             radius = 4,
             color = NA,
             # color = ~pal(accessibility_level),
             fillColor = ~pal(accessibility_level),
             stroke = TRUE, fillOpacity = 0.75,
             popup = ~paste("Ada:", ada,
                            "<br> Staffing:", staffing,
                            "<br> Entrance type:", entrance_type,
                            "<br> Free crossover:", free_crossover)) |>
  addProviderTiles(providers$CartoDB.Positron) |>
  addLegend(
    "bottomright",
    pal = pal,
    values = clustering_data$accessibility_level,
    title = "Accessibility Level",
    opacity = 1
  )

Now, let’s take a look at where high and low accesibility station reside in our city. It appears that the borough/area with high concentration of low accessibility stations are Downtown Manhhatan, Bronx, and Downtown Brooklyn.

When considering restroom access,

subway_with_restroom = read_csv("Data/cleaned_subway_restroom_data.csv")

subway_with_restroom <- subway_with_restroom %>% 
   mutate(
#     #convert to logical
     restroom_changing_stations_logic = as.logical(restroom_changing_stations),
     restroom_status_logic = as.logical(restroom_status),
     restroom_accessibility = fct_explicit_na(restroom_accessibility, na_level = "Unknown"),
     restroom_open = fct_explicit_na(restroom_open, na_level = "Unknown")
   )
# Select the Variables for Clustering
clustering_merged_data <- subway_with_restroom %>%
  dplyr::select(entrance_type, staffing, ada, free_crossover, station_latitude, station_longitude, station_name,
                restroom_open, restroom_accessibility,restroom_changing_stations_logic, restroom_status_logic)
             
set.seed(123)  # For reproducibility
km_result2 <- clustering_merged_data %>% 
  dplyr::select(-station_latitude, -station_longitude, -station_name) %>% 
  klaR::kmodes(modes = 3)

clustering_merged_data$.cluster <- factor(km_result2$cluster)


# Define a Mode function
Mode_for_merged <- function(x) {
  ux <- unique(x)
  ux[which.max(tabulate(match(x, ux)))]
}

cluster_merged_profiles <- clustering_merged_data %>%
  group_by(.cluster) %>%
  dplyr::select(-station_latitude, -station_longitude) %>% 
  dplyr::select(station_name, everything()) %>% 
  summarise(across(everything(), ~ Mode_for_merged(.x)))

knitr::kable(cluster_merged_profiles)
.cluster station_name entrance_type staffing ada free_crossover restroom_open restroom_accessibility restroom_changing_stations_logic restroom_status_logic
1 36th St Stair FULL FALSE TRUE Year Round Fully Accessible FALSE TRUE
2 45th St Stair FULL FALSE TRUE Year Round Unknown FALSE TRUE
3 25th St Stair FULL FALSE FALSE Year Round Not Accessible FALSE TRUE
clustering_merged_data <- clustering_merged_data %>% 
  mutate(
    accessibility_level = case_when(
      .cluster == 1 ~ "High Accessibility",
      .cluster == 2 ~ "Medium Accessibility",
      .cluster == 3 ~ "Low Accessibility"
    )
  )

pal <- leaflet::colorFactor(
  palette = c("chartreuse", "darkgoldenrod1", "brown2"),  # Adjust colors as needed
  domain = clustering_merged_data$accessibility_level
)

leaflet() |>
  addTiles() |>  
  addCircleMarkers(data = clustering_merged_data,
             lng = ~station_longitude,
             lat = ~station_latitude,
             label = ~station_name,
             radius = 4,
             color = NA,
             # color = ~pal(accessibility_level),
             fillColor = ~pal(accessibility_level),
             stroke = TRUE, fillOpacity = 0.75,
             popup = ~paste("Ada:", ada,
                            "<br> Staffing:", staffing,
                            "<br> Entrance type:", entrance_type,
                            "<br> Free crossover:", free_crossover)) |>
  addProviderTiles(providers$CartoDB.Positron) |>
  addLegend(
    "bottomright",
    pal = pal,
    values = clustering_merged_data$accessibility_level,
    title = "Accessibility Level",
    opacity = 1
  )